import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
import os

os.chdir("C:/Users/agusv/Desktop/Estudio/Tesis/Csv")
hvo100_data = pd.read_csv("multivariate_hvo100.csv", parse_dates=['Date'])
hvo100_data.set_index('Date', inplace=True)

hvo100_data['lag_1'] = hvo100_data['HVO100 Price'].shift(1)
hvo100_data['lag_2'] = hvo100_data['HVO100 Price'].shift(2)
hvo100_data['lag_3'] = hvo100_data['HVO100 Price'].shift(3)

train_data = hvo100_data[hvo100_data.index.year >= 2022]

X_training = train_data[['lag_1', 'lag_2', 'lag_3']]
y_training = train_data['HVO100 Price']

sarimax_lag1 = SARIMAX(hvo100_data['lag_1'], order=(0, 1, 0), seasonal_order=(0, 0, 0, 12)).fit(disp=False)
sarimax_lag2 = SARIMAX(hvo100_data['lag_2'], order=(0, 1, 0), seasonal_order=(0, 0, 0, 12)).fit(disp=False)
sarimax_lag3 = SARIMAX(hvo100_data['lag_3'], order=(0, 1, 0), seasonal_order=(0, 0, 0, 12)).fit(disp=False)

"""
from pmdarima import auto_arima
def fit_auto_arimax(series, exog=None, seasonal=True, m=12):
    auto_model = auto_arima(
        series,
        exogenous=exog,
        seasonal=seasonal,
        m=m,
        trace=True,
        error_action="ignore", 
        suppress_warnings=True,
        stepwise=True,
        n_jobs=-1
    )
    return auto_model

# Ajustar modelos auto ARIMAX para cada variable
sarimax_lag1 = fit_auto_arimax(hvo100_data['lag_1'].dropna(), seasonal=True, m=12)
sarimax_lag2 = fit_auto_arimax(hvo100_data['lag_2'].dropna(), seasonal=True, m=12)
sarimax_lag3 = fit_auto_arimax(hvo100_data['lag_3'].dropna(), seasonal=True, m=12)
sarimax_lag4 = fit_auto_arimax(residuals, seasonal=True, m=12)

"""


future_steps = (2030 - hvo100_data.index[-1].year) * 12
future_index = pd.date_range(start=hvo100_data.index[-1] + pd.DateOffset(months=1), periods=future_steps, freq='MS')

lag1_forecast = sarimax_lag1.get_forecast(steps=future_steps).predicted_mean
lag2_forecast = sarimax_lag2.get_forecast(steps=future_steps).predicted_mean
lag3_forecast = sarimax_lag3.get_forecast(steps=future_steps).predicted_mean

future_vars = pd.DataFrame({
    'Lag 1 Forecast': lag1_forecast,
    'Lag 2 Forecast': lag2_forecast,
    'Lag 3 Forecast': lag3_forecast
}, index=future_index)

linear_model = LinearRegression().fit(X_training, y_training)

linear_predictions = linear_model.predict(future_vars)

residuals = y_training - linear_model.predict(X_training)

sarimax_residuals = SARIMAX(residuals, order=(2, 1, 2), seasonal_order=(1, 1, 2, 12)).fit(disp=False)

residuals_forecast = sarimax_residuals.predict(start=future_index[0], end=future_index[-1])

final_predictions = linear_predictions + residuals_forecast + 0.3

for i in range(1, len(final_predictions)):
    if final_predictions[i] <= 0:
        final_predictions[i] = final_predictions[i - 1]

std_dev_residuals = np.std(residuals)
upper_bound = final_predictions + 1 * std_dev_residuals
lower_bound = final_predictions - 1 * std_dev_residuals

plt.figure(figsize=(14, 6))
plt.plot(hvo100_data['HVO100 Price'], label="Real HVO100 Price", color="lightseagreen")
plt.plot(future_index, final_predictions, label="Linear + SARIMAX Forecast", color="red", linestyle="--")
plt.fill_between(future_index, lower_bound, upper_bound, color='orange', alpha=0.3)
plt.xlabel("Date")
plt.ylabel("HVO100 Price [€/lt]")
plt.title("HVO100 Price Prediction (Linear + SARIMAX)")
plt.legend()
plt.show()

predictions_df = pd.DataFrame({
    'Date': future_index,
    'Predicted HVO100 Price': final_predictions,
    'Upper Bound': upper_bound,
    'Lower Bound': lower_bound
})

mean_HVO_2030 = predictions_df[predictions_df["Date"].dt.year == 2030]['Predicted HVO100 Price'].mean()
